import gymnasium as gym
import tensorflow as tf
import numpy as np
import tensorflow_probability as tfp
import tensorlayer as tl

env = gym.make("Pong-V0")
observation = env.reset() # 重置环境 
prev_x = None 
running_reward = None 
reward_sum = 0
episode_number = 0

xs, ys, rs = [], [], [] 
epx, epy, epr = [], [], []

model = get_model([None, D]) # 创建模型 
train_weights = model.trainable_weights
optimizer = tf.optimizers.RMSprop(lr=learning_rate, decay=decay_rate) # 创建优化器